from selenium import webdriver
from pyquery import PyQuery as pq
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json

options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=options)
wait = WebDriverWait(driver, 10)

def get_page(url):
    driver.get(url)
    try:
        element = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "ul.col5"))
        )
    finally:
        print('加载成功')
    html = driver.page_source
    return html

def parse_page(html):
    doc = pq(html,parser="html")
    for item in doc('ul.col5 li').items():
        result =  {
            'index':item('span').eq(0).text(),
            'song':item('.intro a').text(),
            'singer':item('p').text().replace('\xa0',''),
            'days':item.find('.days').text(),
            'trend':item('.trend').text()
        }
        write_to_file(result)

def write_to_file(content):
    with open('doubanmusic.txt','a') as f:
        f.write(json.dumps(content,ensure_ascii=False) + '\n')
        f.close()

def main():
    url = 'https://music.douban.com/chart'
    html = get_page(url)
    parse_page(html)

if __name__ == '__main__':
    main()